Back

netCDF4 File Checker

Library

Python
import netCDF4 as nc
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

NetCDF4 Path

Python
# Path to the NetCDF file
folder_path = r'\Downloaded\GPM'

Check Function

Python
# Loop through all files in the folder
for file_name in os.listdir(folder_path):
    # Check if the file is a NetCDF file (usually ends with .nc or .nc4)
    if file_name.endswith('.nc') or file_name.endswith('.nc4'):
        file_path = os.path.join(folder_path, file_name)
        
        try:
            # Try to open the NetCDF file
            dataset = nc.Dataset(file_path, 'r')
            dataset.close()  # Close it immediately if successful
        
        except OSError as e:
            # Show only files with errors
            print(f"Error opening {file_name}: {e}")

NetCDF Data Explorer and Plotter

Python
def explore_netcdf_structure(file_path):
    """
    Explore and display the structure of a NetCDF file
    """
    try:
        dataset = nc.Dataset(file_path, 'r')
        print(f"=== NetCDF File: {os.path.basename(file_path)} ===")
        print(f"File format: {dataset.file_format}")
        print(f"Data model: {dataset.data_model}")
        
        # Global attributes
        print("\n--- Global Attributes ---")
        for attr in dataset.ncattrs():
            print(f"{attr}: {getattr(dataset, attr)}")
        
        # Dimensions
        print("\n--- Dimensions ---")
        for dim_name, dim in dataset.dimensions.items():
            print(f"{dim_name}: {len(dim)} {'(unlimited)' if dim.isunlimited() else ''}")
        
        # Variables
        print("\n--- Variables ---")
        for var_name, var in dataset.variables.items():
            print(f"{var_name}: {var.dtype} {var.dimensions}")
            if hasattr(var, 'units'):
                print(f"  Units: {var.units}")
            if hasattr(var, 'long_name'):
                print(f"  Long name: {var.long_name}")
            print(f"  Shape: {var.shape}")
            print()
        
        dataset.close()
        return True
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return False
Python
def plot_netcdf_data(file_path, variable_name=None, time_index=0, plot_type='contour'):
    """
    Plot NetCDF data with various visualization options
    
    Parameters:
    - file_path: path to NetCDF file
    - variable_name: name of variable to plot (if None, will try to auto-detect)
    - time_index: time slice to plot (for time series data)
    - plot_type: 'contour', 'pcolormesh', 'imshow'
    """
    try:
        dataset = nc.Dataset(file_path, 'r')
        
        # Auto-detect main data variable if not specified
        if variable_name is None:
            # Look for variables with 3+ dimensions (likely data variables)
            data_vars = [var for var, var_obj in dataset.variables.items() 
                        if len(var_obj.dimensions) >= 2 and var not in ['lat', 'lon', 'latitude', 'longitude', 'time']]
            if data_vars:
                variable_name = data_vars[0]
                print(f"Auto-detected variable: {variable_name}")
            else:
                print("No suitable data variable found")
                dataset.close()
                return
        
        # Get the variable
        if variable_name not in dataset.variables:
            print(f"Variable '{variable_name}' not found in the dataset")
            available_vars = list(dataset.variables.keys())
            print(f"Available variables: {available_vars}")
            dataset.close()
            return
        
        var = dataset.variables[variable_name]
        
        # Get coordinate variables
        dims = var.dimensions
        print(f"Variable dimensions: {dims}")
        
        # Try to identify lat/lon coordinates
        lat_names = ['lat', 'latitude', 'y']
        lon_names = ['lon', 'longitude', 'x']
        
        lat_var = None
        lon_var = None
        
        for lat_name in lat_names:
            if lat_name in dataset.variables:
                lat_var = dataset.variables[lat_name]
                break
        
        for lon_name in lon_names:
            if lon_name in dataset.variables:
                lon_var = dataset.variables[lon_name]
                break
        
        # Extract data
        if len(var.shape) == 4:  # time, level, lat, lon
            data = var[time_index, 0, :, :]
        elif len(var.shape) == 3:  # time, lat, lon or level, lat, lon
            if 'time' in dims:
                data = var[time_index, :, :]
            else:
                data = var[0, :, :]
        elif len(var.shape) == 2:  # lat, lon
            data = var[:, :]
        else:
            print(f"Unsupported data shape: {var.shape}")
            dataset.close()
            return
        
        # Create the plot
        fig, ax = plt.subplots(figsize=(16, 6))
        
        if lat_var is not None and lon_var is not None:
            lat = lat_var[:]
            lon = lon_var[:]
            
            if plot_type == 'contour':
                cs = ax.contour(lon, lat, data, levels=20)
                ax.clabel(cs, inline=True, fontsize=6)
                cf = ax.contourf(lon, lat, data, levels=20, alpha=0.7)
                plt.colorbar(cf, ax=ax, shrink=0.6)
            elif plot_type == 'pcolormesh':
                cf = ax.pcolormesh(lon, lat, data, shading='auto')
                plt.colorbar(cf, ax=ax, shrink=0.6)
            else:  # imshow
                cf = ax.imshow(data, extent=[lon.min(), lon.max(), lat.min(), lat.max()], 
                              origin='lower', aspect='auto')
                plt.colorbar(cf, ax=ax, shrink=0.6)
            
            ax.set_xlabel('Longitude')
            ax.set_ylabel('Latitude')
        else:
            # Plot without coordinates
            cf = ax.imshow(data, origin='lower', aspect='auto')
            plt.colorbar(cf, ax=ax, shrink=0.8)
            ax.set_xlabel('Grid X')
            ax.set_ylabel('Grid Y')
        
        # Add title and labels
        title = f"{variable_name}"
        if hasattr(var, 'long_name'):
            title = f"{var.long_name}"
        if hasattr(var, 'units'):
            title += f" ({var.units})"
        
        ax.set_title(f"{title}\nFile: {os.path.basename(file_path)}")
        
        # Add statistics
        valid_data = data[~np.isnan(data)]
        if len(valid_data) > 0:
            stats_text = f"Min: {valid_data.min():.3f}, Max: {valid_data.max():.3f}, Mean: {valid_data.mean():.3f}"
            ax.text(0.02, 0.02, stats_text, transform=ax.transAxes, 
                   bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.8))
        
        plt.tight_layout()
        plt.show()
        
        dataset.close()
        
    except Exception as e:
        print(f"Error plotting data: {e}")
        if 'dataset' in locals():
            dataset.close()

Explore NetCDF Files in the Folder

Python
# Get list of NetCDF files
netcdf_files = []
if os.path.exists(folder_path):
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.nc') or file_name.endswith('.nc4'):
            file_path = os.path.join(folder_path, file_name)
            netcdf_files.append(file_path)

print(f"Found {len(netcdf_files)} NetCDF files:")
for i, file_path in enumerate(netcdf_files[:10]):  # Show first 10 files
    print(f"{i+1}. {os.path.basename(file_path)}")
if len(netcdf_files) > 10:
    print(f"... and {len(netcdf_files) - 10} more files")

Explore Structure of First NetCDF File

Python
# Explore the first NetCDF file structure
if netcdf_files:
    first_file = netcdf_files[0]
    explore_netcdf_structure(first_file)

Plot NetCDF Data

Python
# Plot the first NetCDF file (auto-detect variable)
if netcdf_files:
    first_file = r"\Downloaded\GPM\shapeMasked.scrubbed.GPM_3IMERGDL_06_precipitationCal.20210114.nc"
    print("Plotting with auto-detected variable...")
    plot_netcdf_data(first_file, plot_type='pcolormesh')

Custom Plotting Options

You can customize the plotting with specific variables and plot types:

Python
# Example: Plot specific variable with different plot types
if netcdf_files:
    file_to_plot = netcdf_files[0]
    
    # Uncomment and modify these lines to customize:
    # plot_netcdf_data(file_to_plot, variable_name='precipitationCal', plot_type='contour')
    # plot_netcdf_data(file_to_plot, variable_name='precipitationCal', time_index=0, plot_type='imshow')
    
    print("To plot a specific variable, use:")
    print("plot_netcdf_data(file_path, variable_name='your_variable', plot_type='contour')")
    print("Available plot types: 'contour', 'pcolormesh', 'imshow'")